http://kosis.kr/ups/ups_01List.jsp?pubcode=AD or http://kosis.kr/upsHtml/online.do?isOnline=Y&PART=G&pubcode=AD http://kosis.kr/upsHtml/online/downSrvcFile.do?PUBCODE=AD&SEQ=2959&FILE_NAME=030117.xlsx 원하는 xlsx파일을 data/xls에 저장한 후 읽어들인다.
require(readxl)
xls_file <- "data/xls/2015년_030117.xlsx"
xdf <- read_excel(xls_file)
head(xdf)
colnames(xdf)
[1] "전출지" "전입지" "계" "0~4세" "5~9세" "10~14세"
[7] "15~19세" "20~24세" "25~29세" "30~34세" "35~39세" "40~44세"
[13] "45~49세" "50~54세" "55~59세" "60~64세" "65~69세" "70~74세"
[19] "75~79세" "80세이상" "남(계)" "남(0~4세)" "남(5~9세)" "남(10~14세)"
[25] "남(15~19세)" "남(20~24세)" "남(25~29세)" "남(30~34세)" "남(35~39세)" "남(40~44세)"
[31] "남(45~49세)" "남(50~54세)" "남(55~59세)" "남(60~64세)" "남(65~69세)" "남(70~74세)"
[37] "남(75~79세)" "남(80세이상)" "여(계)" "여(0~4세)" "여(5~9세)" "여(10~14세)"
[43] "여(15~19세)" "여(20~24세)" "여(25~29세)" "여(30~34세)" "여(35~39세)" "여(40~44세)"
[49] "여(45~49세)" "여(50~54세)" "여(55~59세)" "여(60~64세)" "여(65~69세)" "여(70~74세)"
[55] "여(75~79세)" "여(80세이상)" "전출지코드" "전입지코드"
전입지, 전출지, 계만 남기고 다른 컬럼들은 무시한다. 전입지, 전출지중에서 시군구 레벨만 남긴다. 외부로의 이동만 포함
df <- select_(xdf, from="전입지", to="전출지", value="계", mvalue=21, fvalue=39) %>%
filter(from == '세종' | nchar(from)>2) %>%
filter(to == '세종' | nchar(to)>2) %>%
filter(from != to )
head(df)
제일많이 빠져나간 곳은 어딜까?
sdf <- arrange(df, desc(value))
head(sdf)
sdf <- filter(df, value>1000)
ggplot(sdf, aes(value)) + geom_histogram(binwidth=500)
남녀간 이동의 차이
ggplot(df, aes(mvalue - fvalue)) + geom_histogram(binwidth=5) +ylim(c(0,1000))
mean(df$mvalue-df$fvalue)
[1] 2.624549
sdf <- filter(df, value>2000) %>% arrange(from)
require(circlize)
par(family='Apple SD Gothic Neo')
chordDiagram(sdf, annotationTrack = "grid",
preAllocateTracks = list(track.height = 0.3))
Unknown column 'rank'
# we go back to the first track and customize sector labels
circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
xlim = get.cell.meta.data("xlim")
ylim = get.cell.meta.data("ylim")
sector.name = get.cell.meta.data("sector.index")
circos.text(mean(xlim), ylim[1], sector.name, facing = "clockwise",
niceFacing = TRUE, adj = c(0, 0.5))
}, bg.border = NA) # here set bg.border to NA is important
네트워크로 표현해볼까.
library(igraph)
ndf <- filter(df, value>1000) %>% arrange(from)
g <- graph_from_data_frame(ndf)
그려보자
par(family='Apple SD Gothic Neo')
edgeScale = 0.002
plot(g, vertex.size=2,
vertex.label.family='Apple SD Gothic Neo', vertex.label.cex=2,
edge.width = E(g)$value * edgeScale, edge.arrow.size=E(g)$value * edgeScale * 0.4,
edge.color = rgb(0.2,0.2,0.2,.2))